Homework: https://work.caltech.edu/homework/hw2.pdf
Answers:
Answer key: https://work.caltech.edu/homework/hw2_sol.pdf
In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
from numpy.linalg import inv
In [23]:
class Coin:
def __init__(self):
self.flips = np.array([])
def flip(self, number_of_flips=1):
self.flips = np.random.randint(0, 2, number_of_flips)
return self.flips
def frequency_of_heads(self):
return self.flips.sum()
def fraction_of_heads(self):
return self.flips.mean()
In [24]:
coins = [Coin() for i in range(1000)]
len(coins)
Out[24]:
In [41]:
def experiment(coins, number_of_flips=10):
for coin in coins:
coin.flip(number_of_flips)
frequencies = [coin.frequency_of_heads() for coin in coins]
fractions = [coin.fraction_of_heads() for coin in coins]
indices_of_interest = [0,
np.random.randint(1000),
frequencies.index(min(frequencies)),
frequencies.index(max(frequencies))]
return np.array([fractions[i] for i in indices_of_interest])
experiment(coins)
Out[41]:
In [43]:
N = 100000
results = np.array([0.0, 0.0, 0.0, 0.0])
for n in range(N):
results += experiment(coins)
results / N
Out[43]:
Answers 1 & 2:
indices of interest: array([0, random_int, min_freq, max_freq])
average frequencies: array([ 0.500242, 0.499549, 0.03762 , 0.962324])
In [131]:
class Line:
'''A line that passes through the first two points of the given array'''
def __init__(self, x1_s, x2_s, weights=None):
if weights is not None:
assert len(weights) == 3
w0, w1, w2 = weights
w0 /= w2
w1 /= w2
self.intercept = -w0
self.slope = -w1
else:
assert len(x1_s) == 2
assert len(x2_s) == 2
self.slope = (x2_s[1]-x2_s[0])/(x1_s[1]-x1_s[0])
self.intercept = x2_s[0] - self.slope * x1_s[0]
def get_y(self, x):
return self.intercept + self.slope * x
def get_sign(self, x1, x2):
return np.sign(- self.intercept - self.slope*x1 + x2 )
def get_weights(self):
return np.array([-self.intercept, -self.slope, 1])
In [132]:
x_min = -1
x_max = 1
y_min = x_min
y_max = x_max
In [172]:
def select_target_function(plot = False):
target_function_x = list(np.random.uniform(x_min, x_max, size=2))
target_function_y = list(np.random.uniform(y_min, y_max, size=2))
target_function = Line(target_function_x, target_function_y)
if plot:
plt.axis([x_min, x_max, y_min, y_max])
plt.scatter(target_function_x, target_function_y, color='#006400')
plt.plot([x_min, x_max], [target_function.get_y(x_min), target_function.get_y(x_max)], color='green')
return target_function
Out[172]:
In [174]:
N = 100
sample_x1 = list(np.random.uniform(-1, 1, size=N))
sample_x2 = list(np.random.uniform(-1, 1, size=N))
sample_y = [target_function.get_sign(x1, x2) for x1, x2 in zip(sample_x1, sample_x2)]
plt.axis([x_min, x_max, y_min, y_max])
plt.scatter(target_function_x, target_function_y, color='#006400')
plt.plot([x_min, x_max], [target_function.get_y(x_min), target_function.get_y(x_max)], color='green')
plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
Out[174]:
In [175]:
X = np.array([np.ones(N), sample_x1, sample_x2]).transpose()
y = np.array(sample_y)
#X, y
In [176]:
X_transpose = X.transpose()
X_dagger = np.dot(inv(np.dot(X_transpose, X)), X_transpose)
w_lin_reg = np.dot(X_dagger, y)
print "Linear regression weights: {}".format(w_lin_reg)
predicted_function = Line(None, None, w_lin_reg)
In [177]:
plt.axis([x_min, x_max, y_min, y_max])
plt.scatter(target_function_x, target_function_y, color='#006400')
plt.plot([x_min, x_max], [target_function.get_y(x_min), target_function.get_y(x_max)], color='green')
plt.plot([x_min, x_max], [predicted_function.get_y(x_min), predicted_function.get_y(x_max)], color='purple')
plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
Out[177]:
In [178]:
y_pred = np.array([predicted_function.get_sign(x1, x2) for x1, x2 in zip(sample_x1, sample_x2)])
results = y_pred != y
1.0 * results.sum()/len(results)
Out[178]:
In [179]:
def sample_data_set(target_function, number_of_points=100):
sample_x1 = list(np.random.uniform(-1, 1, size=number_of_points))
sample_x2 = list(np.random.uniform(-1, 1, size=number_of_points))
X = np.array([np.ones(number_of_points), sample_x1, sample_x2]).transpose()
y = np.array([target_function.get_sign(x1, x2) for x1, x2 in zip(sample_x1, sample_x2)])
return X, y
def linear_regression(X, y):
X_dagger = np.dot(inv(np.dot(X.T, X)), X.T)
weights = np.dot(X_dagger, y)
return weights
In [137]:
predicted_functions = []
in_sample_errors = []
number_of_experiments = 1000
for i in range(number_of_experiments):
X, y = sample_data_set(target_function, number_of_points=100)
weights = linear_regression(X, y)
predicted_function = Line(None, None, weights)
predicted_functions.append(predicted_function)
predictions = np.array([predicted_function.get_sign(x[1], x[2]) for x in X])
misclassified = predictions != y
in_sample_error = 1.0 * misclassified.sum()/len(misclassified)
in_sample_errors.append(in_sample_error)
average_in_sample_error = np.array(in_sample_errors).mean()
print average_in_sample_error
print len(predicted_functions)
In [138]:
X_out, y_out = sample_data_set(target_function, number_of_points=1000)
out_of_sample_errors = []
for predicted_function in predicted_functions:
predictions = np.array([predicted_function.get_sign(x[1], x[2]) for x in X_out])
misclassified = predictions != y_out
out_of_sample_error = 1.0 * misclassified.sum()/len(misclassified)
out_of_sample_errors.append(out_of_sample_error)
average_out_of_sample_error = np.array(out_of_sample_errors).mean()
print average_out_of_sample_error
In [162]:
def PLA(X, y, initial_weights, max_iterations = 15000, verbose=False):
w = initial_weights.copy()
iter = 0
while iter < max_iterations:
if verbose:
print "Iteration {} w {}".format(iter, w)
predictions = np.sign(np.dot(X, w))
misclassfied = predictions != y
num_misclassified = np.sum(misclassfied)
if num_misclassified == 0:
break
if verbose:
print "Still misclassified: {}".format(num_misclassified)
chosen = np.random.choice(np.where(misclassfied)[0])
w += X[chosen] * y[chosen]
iter += 1
if verbose:
print "Returning iter {}".format(iter)
print
return (iter+1 if iter < max_iterations else None, w)
In [164]:
number_of_experiments = 10000
iterations = 0
for i in range(number_of_experiments):
X_pla, y_pla = sample_data_set(target_function, number_of_points=10)
lin_reg_weights = linear_regression(X, y)
i, _ = PLA(X_pla, y_pla, lin_reg_weights)
if not i:
print "Complain loudly"
break
iterations += i
iterations = iterations * 1.0 / number_of_experiments
print "PLA took {} iterations on average to converge".format(iterations)
In [199]:
training_set_size = 20
# Generate X's
sample_x1 = list(np.random.uniform(-1, 1, size=training_set_size))
sample_x2 = list(np.random.uniform(-1, 1, size=training_set_size))
X = np.array([np.ones(len(sample_x1)), sample_x1, sample_x2]).transpose()
# Generate y's
sample_y = [np.sign(x[1] * x[1] + x[2] * x[2] - 0.6) for x in X]
y = np.array(sample_y)
# Flip signs of random 10% of points to introduce noise
random_index = np.random.choice(range(len(y)), size=np.round((1.0*len(y))/10), replace=False, p=None)
y[random_index] *= -1
# Plot points
plt.axis([x_min, x_max, y_min, y_max])
plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
Out[199]:
In [203]:
avg_in_sample_error = 0
number_of_experiments = 1000
for i in range(number_of_experiments):
training_set_size = 1000
# Generate X's
sample_x1 = list(np.random.uniform(-1, 1, size=training_set_size))
sample_x2 = list(np.random.uniform(-1, 1, size=training_set_size))
X = np.array([np.ones(len(sample_x1)), sample_x1, sample_x2]).transpose()
# Generate y's
sample_y = [np.sign(x[1] * x[1] + x[2] * x[2] - 0.6) for x in X]
y = np.array(sample_y)
# Flip signs of random 10% of points to introduce noise
random_index = np.random.choice(range(len(y)), size=np.round((1.0*len(y))/10), replace=False, p=None)
y[random_index] *= -1
# Plot points
# plt.axis([x_min, x_max, y_min, y_max])
# plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
weights = linear_regression(X, y)
predicted_function = Line(None, None, weights)
predictions = np.array([predicted_function.get_sign(x[1], x[2]) for x in X])
misclassified = predictions != y
avg_in_sample_error += 1.0 * misclassified.sum()/len(misclassified)
avg_in_sample_error /= number_of_experiments
print avg_in_sample_error
In [234]:
number_of_experiments = 100
weights_NLT_ALL = []
for i in range(number_of_experiments):
verbose = number_of_experiments < 3
training_set_size = 1000
# Generate X's
sample_x1 = list(np.random.uniform(-1, 1, size=training_set_size))
sample_x2 = list(np.random.uniform(-1, 1, size=training_set_size))
X = pd.DataFrame(np.array([np.ones(len(sample_x1)), sample_x1, sample_x2]).transpose())
X.columns = ['x0', 'x1', 'x2']
if verbose:
display(X.head())
# Generate y's
y = np.sign(X.x1**2 + X.x2**2 - 0.6)
if verbose:
display(y.head())
# Flip signs of random 10% of points to introduce noise
random_index = np.random.choice(range(len(y)), size=np.round((1.0*len(y))/10), replace=False, p=None)
y[random_index] *= -1
if verbose:
print "{} of {}: {}".format(np.round((1.0*len(y))/10), len(y), random_index)
if verbose:
# Plot points
plt.axis([x_min, x_max, y_min, y_max])
plt.scatter(sample_x1, sample_x2, c=y.values, marker='x')
plt.show()
if verbose:
weights = linear_regression(X, y)
weights /= abs(weights[0])
print "Linear regression weights: {}".format(weights)
X_nlt = X.copy()
X_nlt['x12'] = X.x1*X.x2
X_nlt['x11'] = X.x1**2
X_nlt['x22'] = X.x2**2
if verbose:
display(X_nlt.head())
weights_nlt = linear_regression(X_nlt, y)
weights_nlt /= abs(weights_nlt[0])
if verbose:
print "Linear regression weights (after non linear transformation): {}".format(weights_nlt)
weights_NLT_ALL.append(weights_nlt)
weights_NLT_ALL_df = pd.DataFrame(weights_NLT_ALL)
display(weights_NLT_ALL_df.head())
display(weights_NLT_ALL_df.describe())
In [250]:
avg_out_of_sample_error = 0
test_set_size = 1000
# Generate X's
sample_x1 = list(np.random.uniform(-1, 1, size=test_set_size))
sample_x2 = list(np.random.uniform(-1, 1, size=test_set_size))
X = pd.DataFrame(np.array([np.ones(len(sample_x1)), sample_x1, sample_x2]).transpose())
X.columns = ['x0', 'x1', 'x2']
X['x12'] = X.x1*X.x2
X['x11'] = X.x1**2
X['x22'] = X.x2**2
# Generate y's
y = np.sign(X.x1**2 + X.x2**2 - 0.6)
# Flip signs of random 10% of points to introduce noise
random_index = np.random.choice(range(len(y)), size=np.round((1.0*len(y))/10), replace=False, p=None)
y[random_index] *= -1
for w in weights_NLT_ALL:
predictions = np.sign(X.dot(w))
misclassified = predictions != y
avg_out_of_sample_error += 1.0 * misclassified.sum()/len(misclassified)
print avg_out_of_sample_error, len(weights_NLT_ALL)
avg_out_of_sample_error /= len(weights_NLT_ALL)
print avg_out_of_sample_error
In [ ]:
In [ ]: